library(ggplot2)
library(Cairo)
library(foreign)
library(gridExtra)
library(gtable)
library(scales)
library(data.table)
library(dplyr)
library(plyr)
library(openxlsx)
library(readstata13)
library(reshape2)
Sys.setlocale(,"CHS")


# setwd("C:/Users/yama8/Dropbox/WVS_regime_media_values/Democracy and science/DATA/Replication")



listdele<-function(x){
  a<-x[complete.cases(x),]
  return(a)
}

theme_set(theme_bw() + theme(legend.position="bottom",
                             legend.direction="horizontal",
                             axis.title.x=element_text(vjust=-.5),
                             axis.text.y =  element_text(colour = "black", hjust = 0 , vjust=.5 ),
                             title=element_text(face="bold",vjust=1.5),
                             strip.text=element_text(size=13),
                             plot.title = element_text(hjust = 0.5)))


pd<-position_dodge(width=0.4)
cl<-c("steelblue","orange")


################################
########## Figure 1 ############
################################

avg<--.0001751  # average trust in science

######## Least Educated [Only Top and Botton 10] ########

d<-read.dta13("[F]DATA_country_level.dta")
d<-d[which(is.na(d$y_ledu)==F & is.na(d$y_hedu)==F & is.na(d$e_polity2)==F),]
d$demo<-factor(d$demo)
ord<-d$country[order(-d$y_ledu)]
d$country<-factor(d$country, levels=ord)


d2<-d[order(d$y_ledu),]
d2x<-d2[c(1:10,(nrow(d2)-9):nrow(d2)),]
d2x$lab<-factor(rep(c("Bottom 10","Top 10"),each=10))

gl<-ggplot(d2x,aes(x=country,y=y_ledu,color=demo,fill=demo))+
  geom_point(size=3,shape=24)+coord_flip()+facet_wrap(.~lab,scales = "free_y")+geom_hline(yintercept=avg,linetype="dotted")+
  scale_color_manual("",values=c("black","#D7191C"),labels=c("Non-democracy","Democracy"))+xlab("")+
  scale_fill_manual("",values=c("white","#D7191C"),labels=c("Non-democracy","Democracy"))+ylab("Trust in Science (IRT)")+theme(axis.text.y = element_text(size=9))+
  ggtitle("Subsample: Primary Education or Below")


######## College Educated [Only Top and Botton 10] ########

d<-read.dta13("[F]DATA_country_level.dta")
d<-d[which(is.na(d$y_ledu)==F & is.na(d$y_hedu)==F & is.na(d$e_polity2)==F),]
d$demo<-factor(d$demo)
ord<-d$country[order(-d$y_hedu)]
d$country<-factor(d$country, levels=ord)


d2<-d[order(d$y_hedu),]
d2x<-d2[c(1:10,(nrow(d2)-9):nrow(d2)),]
d2x$lab<-factor(rep(c("Bottom 10","Top 10"),each=10))

gh<-ggplot(d2x,aes(x=country,y=y_hedu,color=demo,fill=demo))+
  geom_point(size=3,shape=21)+coord_flip()+facet_wrap(.~lab,scales = "free_y")+geom_hline(yintercept=avg,linetype="dotted")+
  scale_color_manual("",values=c("black","#D7191C"),labels=c("Non-democracy","Democracy"))+
  scale_fill_manual("",values=c("white","#D7191C"),labels=c("Non-democracy","Democracy"))+xlab("")+ylab("Trust in Science (IRT)")+theme(axis.text.y = element_text(size=9))+
  ggtitle("Subsample: College or Above")

#### Combined Graph ###
library(gridExtra)
library(gtable)
library(grid)
g1<-ggplotGrob(gl)
g2<-ggplotGrob(gh)

maxHeight<-unit.pmax(g1$heights[2:4],g2$heights[2:4])
maxWidths<-unit.pmax(g1$widths[2:8],g2$widths[2:8])


g1[["widths"]][2:8]<-g2[["widths"]][2:8]<-maxWidths



jpeg("[F]figure_1.jpg",res = 1000, height= 7000, width = 7000 )
grid.arrange(g1,g2)
dev.off()





######################################################
##### Figure A.5 and A.6: Full Country Ranking #######
######################################################


# Figure A.5: order by low education
ord<-d$country[order(-d$y_ledu)]
d$country<-factor(d$country, levels=ord)

d$demo<-factor(d$demo)

d$demo2<-factor(as.numeric(d$e_polity2>=5))


d$top50<-factor(d$y_ledu>=median(d$y_ledu),levels=c(FALSE,TRUE),labels=c("Bottom 50%","Top 50%"))

gfl<-ggplot(d,aes(x=country,y=y_ledu,color=demo,fill=demo))+
  geom_point(shape=24)+coord_flip()+facet_wrap(.~top50,scales = "free_y")+geom_hline(yintercept=avg,linetype="dotted")+
  scale_color_manual("",values=c("black","#D7191C"),labels=c("Non-democracy","Democracy"))+
  scale_fill_manual("",values=c("white","#D7191C"),labels=c("Non-democracy","Democracy"))+xlab("")+ylab("Trust in Science (IRT)")+theme(axis.text.y = element_text(size=9))

pdf("[F]figure_A5.pdf",height=9,width=6)
gfl
dev.off()




# Figure A.6: order by high education 
ord<-d$country[order(-d$y_hedu)]
d$country<-factor(d$country, levels=ord)

d$demo<-factor(d$demo)

d$top50<-factor(d$y_hedu>=median(d$y_hedu),levels=c(FALSE,TRUE),labels=c("Bottom 50%","Top 50%"))

gfh<-ggplot(d,aes(x=country,y=y_hedu,color=demo,fill=demo))+
  geom_point(shape=21)+coord_flip()+facet_wrap(.~top50,scales = "free_y")+geom_hline(yintercept=avg,linetype="dotted")+
  scale_color_manual("",values=c("black","#D7191C"),labels=c("Non-democracy","Democracy"))+
  scale_fill_manual("",values=c("white","#D7191C"),labels=c("Non-democracy","Democracy"))+xlab("")+ylab("Trust in Science (IRT)")+theme(axis.text.y = element_text(size=9))

pdf("[F]figure_A6.pdf",height=9,width=6)
gfh
dev.off()






##################################################### 
########### Figure 2: Unpack Regime Type   ##########
#####################################################
library(ggstance)

### Cross sectional 
db<-read.xlsx("beta_cs.xlsx",colNames = F)
dc<-read.xlsx("cov_cs.xlsx",colNames = F)
dx1<-data.frame(beta=t(db)[,1],
                se=sqrt(diag(as.matrix(dc))))


### Cross cohort 
db<-read.xlsx("beta.xlsx",colNames = F)
dc<-read.xlsx("cov.xlsx",colNames = F)
dx2<-data.frame(beta=t(db)[,1],
                se=sqrt(diag(as.matrix(dc))))


dx<-rbind(dx1,dx2)
dx$lb95<-dx$beta-1.96*dx$se
dx$ub95<-dx$beta+1.96*dx$se
dx$regime<-factor(rep(rep(c("Personalist","Single-Party","Military","Monarchy"),each=3),2),levels=c("Personalist","Single-Party","Military","Monarchy"),
                  labels=c("Democracy -\nPersonalist Regime","Democracy -\nSinge-Party Regime","Democracy -\nMilitary Regime","Democracy -\nMonarchy"))

dx$type<-factor(rep(c("Cross-sectional","Cross-cohort"),each=12),levels=rev(c("Cross-sectional","Cross-cohort")),labels=rev(c("Cross-sectional estimation","Cross-cohort estimation")))

dx$edu<-factor(rep(c("Primary or below","Secondary","College or above"),8), labels=rev(c("Primary or below","Secondary","College or above")), levels = rev(c("Primary or below","Secondary","College or above")))


col.cs<-rgb(157,157,130,maxColorValue = 255)
col.ch<-rgb(74,75,157,maxColorValue = 255)

gr<-ggplot(dx,aes(x=beta,y=edu,group=type,color=type,shape=type))+geom_pointrange(aes(xmin=lb95,xmax=ub95),position=position_dodgev(height=0.4))+
  facet_wrap(.~regime,ncol=4)+xlab("")+ylab("")+
  geom_vline(xintercept=0,linetype="dotted")+
  scale_color_manual("",breaks=c("Cross-sectional estimation","Cross-cohort estimation"),values=c(col.cs,col.ch))+
  scale_shape_manual("",breaks=c("Cross-sectional estimation","Cross-cohort estimation"),values=c(19,17))+
  theme(axis.text.y = element_text(size=14),legend.text = element_text(size=14))


jpeg("[F]figure_2.jpg",res = 1000, height=4000,width=10000)
gr
dev.off()





###################################################################
################ Figure A.1: Summary Statistics ###################
###################################################################
library(openxlsx)
library(scales)
library(plyr)

d<-read.dta13("[F]DATA_distribution.dta")
dv<-read.xlsx("science_var_label.xlsx")

dv$varlab<-gsub("\\\\n","\n",dv$varlab)
dv$varlab<-factor(dv$varlab,levels=dv$varlab)

d$religious_vs_science<-factor(d$religious_vs_science,labels=c("Religion","It depends","Science"))

gsub("\\\\n","\n",c("dfdfdf\\ndfdfdf"))
vars<-dv$name

d2<-data.table(d[,c("edu",vars)])
d2$id<-1:nrow(d2)
dm<-melt.data.table(d2,measure.vars = vars)

dm$value<-factor(dm$value,levels=c("Increase","Neither/Have no effect","Decrease","A lot","Most","Some","Not much","Very few" ,"Not at all","Science","It depends","Religion","Yes","No"))




dm2<-merge(dm,dv,by.x="variable", by.y="name")

dm3<-dm2[is.na(dm2$edu)==F & is.na(dm2$value)==F, ]


dfq<-ddply(dm3,.(varlab, value),summarise,
           freq=sum(is.na(value)==F))

dfq<-ddply(dfq,.(varlab),transform,
           prop=freq/sum(freq))
dfq$pct.lab<-paste(round(dfq$prop*100,1),"%")

answer.4.l<-c(0.55,1.55,2.55,3.55)
answer.4.r<-c(1.45,2.45,3.45,4.45)

answer.3.l<-c(0.55,1.55,2.55)
answer.3.r<-c(1.45,2.45,3.45)

answer.2.l<-c(0.55,1.55)
answer.2.r<-c(1.45,2.45)


dfq$xleft<-c(rep(answer.4.l,8),rep(answer.3.l,1),rep(answer.2.l,2),rep(answer.3.l,2))

dfq$xright<-c(rep(answer.4.r,8),rep(answer.3.r,1),rep(answer.2.r,2),rep(answer.3.r,2))

dm3$edu<-factor(dm3$edu)

gbar2<-ggplot(dm3,aes(x=value))+geom_bar(aes(y=..prop..,fill=edu,group=edu),position="dodge")+facet_wrap(.~varlab,scales = "free_x",ncol=3)+geom_text(aes(y=..prop..,group=edu, label= scales::percent(..prop..,accuracy=1)),stat= "count", vjust = -.5, position=position_dodge(width=1),size=2)+
  scale_y_continuous(limits=c(0,1.3))+xlab("")+ylab("")+
  scale_fill_manual("",values=c("red","orange","steelblue"),labels=c("Primary","Secondary","Tertiary"))+
  geom_segment(data=dfq,aes(x=xleft,xend=xleft,y=.9,yend=1.1),color="blue")+
  geom_segment(data=dfq,aes(x=xright,xend=xright,y=.9,yend=1.1),color="blue")+
  geom_segment(data=dfq,aes(x=xleft,xend=xright,y=1.1,yend=1.1),color="blue")+
  geom_text(data=dfq,aes(x=value,y=1.25,label=pct.lab),color="blue",size=2)

CairoPDF("[F]figure_A1.pdf",height=12,width=11)
gbar2
dev.off()



########################################################## 
############## Figure A.2 WGM vs. WVS ####################
########################################################## 
library(countrycode)
library(haven)

dwvs<-read.dta13("[F]DATA_wvs_religion_byedu.dta")
dwvs$iso3<-countrycode(dwvs$country,origin="country.name",destination="iso3c")
dwvs$science.wvs<-5-dwvs$religous_alwaysright

dy<-read.dta13("[F]DATA_wgm_y2_byedu.dta")
dy$edulevel2 <- dy$edu


dy<-dy[is.na(dy$edulevel2)==F,]

dc<-merge(dwvs,dy,by=c("iso3","edulevel2"))
dc$facetlab<-factor(dc$edulevel2,labels = c("Primary or below","Secondary","College or above"))


f1<-lm(religious_vs_science~science.wvs,data=dc,subset=edulevel2==1)
f2<-lm(religious_vs_science~science.wvs,data=dc,subset=edulevel2==2)
f3<-lm(religious_vs_science~science.wvs,data=dc,subset=edulevel2==3)


beta<-paste("beta == ","'",round(c(coef(f1)[2],coef(f2)[2],coef(f3)[2]),2),"***'",sep="")

danno<-data.frame(b=beta,
                  religious_vs_science=.27,
                  science.wvs=4.7,
                  facetlab=factor(c("Primary or below","Secondary","College or above"),levels=c("Primary or below","Secondary","College or above")))


gv1<-ggplot(dc,aes(x=religious_vs_science,y=science.wvs,label=iso3))+
  geom_text(check_overlap = TRUE,size=3)+
  geom_smooth(method="lm")+
  facet_wrap(.~facetlab,ncol=3)+xlab("Trust Science over Religion (WGM)")+
  ylab("Trust Science over Religion (WVS 2010-14)")+
  geom_text(data=danno,label=beta,parse=T)


pdf("[F]figure_A2.pdf",height=4,width=12)
gv1
dev.off()


##########################################################
############## Figure A.3 WGM vs. Pew ####################
##########################################################
dwvs<-read.dta13("[F]DATA_pew_scitrust.dta")

dy<-read.dta13("[F]DATA_wgm_y2.dta")

dc<-merge(dwvs,dy,by="iso3")

dc$t_scientist_cb<-(3*dc$t_scientist_alot+2*dc$t_scientist_some+dc$t_scientist_no)/600




f<-lm((t_scientist_cb)~trust_scientist,data=dc)
beta<-paste("beta == ","'",round(coef(f)[2],2),"***'",sep="")


gpew<-ggplot(dc,aes(x=y2,y=t_scientist_cb,label=iso3))+
  geom_smooth(method="lm")+
  geom_text(check_overlap = TRUE,size=3)+
  annotate("text",x=-0.27,y=.4,label=beta,parse=T,size=6)+
  xlab("Trust in Scientists (WGM)")+
  ylab("Trust in Scientists (Pew)")



pdf("[F]figure_A3.pdf",height=5,width=8)
gpew
dev.off()





######## Figure A.4: PCA Analysis ################
dv<-read.xlsx("science_var_label.xlsx")
d<-read.dta13("DATA_WP_jiang_wan_main.dta")
ddem<-sapply(d[,dv$name],as.numeric)

pca<-prcomp(na.omit(ddem))
ve<-(pca$sdev)^2/sum((pca$sdev)^2)
data<-data.frame(pc=paste("PC",1:13,sep=" "),ve=ve)
data$pct<-paste(round(data$ve*100,2),"%",sep="")
data$pc<-factor(data$pc,levels=data$pc)

gdem<-ggplot(data,aes(x=pc,y=ve))+geom_bar(stat="identity",fill="steelblue")+geom_text(aes(label=pct,y=ve+.01))+
  xlab("")+ylab("% Variance Explained")+ggtitle("PCA Components")+scale_y_continuous(limits=c(0,0.5))

pdf("[F]figure_A4.pdf",width=7,height=5)
gdem
dev.off()






###########################################################################################
########### Figure A.7: Summary Statistics for Cohort-based Variations  ###################
###########################################################################################


##### Variations in Democratic Exposure (Heat Map) ########

d<-read.dta13("[F]DATA_cohort_exposure.dta")
d$exposure<-d$dexp_demo6_14after

df<-expand.grid(country=unique(d$country),birthyear=min(d$birthyear,na.rm=T):2000,stringsAsFactors = F)


df2<-merge(df,d,by=c("country","birthyear"),all.x=T)
df2$country<-factor(df2$country,levels=rev(unique(df2$country)))


library(RColorBrewer)

gv<-ggplot(df2,aes(x=birthyear,y=country,fill=exposure))+
  geom_tile(colour="black",size=0.25)+
  scale_fill_gradient("As of 2018, % of life (after age 14)\n lived under democracy",low="white",high="#D7191C",na.value = "grey70")+
  ylab("")+xlab("Birth Year")+
  scale_x_continuous(expand=c(0,0),
                   breaks=seq(1920,2000,10))+
  theme(plot.background=element_blank(),
        panel.border=element_blank(),
        axis.ticks=element_line(size=0.4))

 
CairoPDF("[F]figure_A7.pdf",height=16,width=13)
gv
dev.off()




##############################################################################
############ Figure A.8: Results from Individual Questions  ##################
##############################################################################
library(ggstance)

dv<-read.xlsx("science_var_label.xlsx")
dv$varlab<-gsub("\\\\n","\n",dv$varlab)
dv$varlab<-factor(dv$varlab,levels=dv$varlab)
files<-dir("./result_indiQ")


dlist<-list()
for(i in 1:length(files)){
  d<-listdele(read.csv(paste("./result_indiQ/",files[i],sep=""),stringsAsFactors = F))
  ty<-substring(files[i],1,2)
  
  d$type<-ifelse(ty=="cs","Cross-sectional","Cross-cohort")
  d$var<-gsub("^cs_|^ch_|\\.csv$","",files[i])
  dlist[[i]]<-d
  
}
dx<-rbindlist(dlist)

dx2<-merge(dx,dv,by.x="var",by.y="name")
dx2$edu<-factor(dx2$rn, labels=rev(c("Primary or below","Secondary","College or above")), levels = rev(c("primary","secondary","tertiary")))
dx2$type<-factor(dx2$type,levels=rev(c("Cross-sectional","Cross-cohort")),labels=rev(c("Cross-sectional estimation","Cross-cohort estimation")))
# Define colors #
col.cs<-rgb(157,157,130,maxColorValue = 255)
col.ch<-rgb(74,75,157,maxColorValue = 255)

# Plot #
gq<-ggplot(dx2,aes(x=beta,y=edu,group=type,color=type,shape=edu))+geom_pointrange(aes(xmin=lb95,xmax=ub95),position=position_dodgev(height=0.4))+facet_wrap(.~varlab,ncol=3)+xlab("")+ylab("")+
  geom_vline(xintercept=0,linetype="dotted")+
  scale_color_manual("",breaks=c("Cross-sectional estimation","Cross-cohort estimation"),values=c(col.cs,col.ch))+
  scale_shape_manual("",values=c(19,15,17))+
  guides(shape="none")+theme(axis.text.y = element_text(size=14),
                             legend.text = element_text(size=14))


pdf("[F]figure_A8.pdf",height=13,width=10.5)
gq
dev.off()




########################################################################################
############ Figure A.9: Results from Additional Science-Related Q's  ##################
########################################################################################
library(ggstance)

dv<-read.xlsx("otherscience_var_label.xlsx")
dv$varlab<-gsub("\\\\n","\n",dv$varlab)
dv$varlab<-factor(dv$varlab,levels=dv$varlab)


files<-dir("./result_otherscience")


dlist<-list()
for(i in 1:length(files)){
  d<-listdele(read.csv(paste("./result_otherscience/",files[i],sep=""),stringsAsFactors = F))
  ty<-substring(files[i],1,2)
  
  d$type<-ifelse(ty=="cs","Cross-sectional","Cross-cohort")
  d$var<-gsub("^cs_|^ch_|\\.csv$","",files[i])
  dlist[[i]]<-d
  
}
dx<-rbindlist(dlist)

dx2<-merge(dx,dv,by.x="var",by.y="name")
dx2$edu<-factor(dx2$rn, labels=rev(c("Primary or below","Secondary","College or above")), levels = rev(c("primary","secondary","tertiary")))
dx2$type<-factor(dx2$type,levels=rev(c("Cross-sectional","Cross-cohort")),labels=rev(c("Cross-sectional estimation","Cross-cohort estimation")))
# Define colors #
col.cs<-rgb(157,157,130,maxColorValue = 255)
col.ch<-rgb(74,75,157,maxColorValue = 255)

# Plot #
gq<-ggplot(dx2,aes(x=beta,y=edu,group=type,color=type,shape=edu))+geom_pointrange(aes(xmin=lb95,xmax=ub95),position=position_dodgev(height=0.5))+facet_wrap(.~varlab,ncol=2)+xlab("")+ylab("")+
  geom_vline(xintercept=0,linetype="dotted")+
  scale_color_manual("",breaks=c("Cross-sectional estimation","Cross-cohort estimation"),values=c(col.cs,col.ch))+
  scale_shape_manual("",values=c(19,15,17))+
  guides(shape=F)+theme(axis.text.y = element_text(size=14),
                        legend.text = element_text(size=14))+
  theme(plot.margin = margin(t = 0, r = 10, b = 0, l = 0, unit = "pt"))


pdf("[F]figure_A9.pdf",height=6,width=9)
gq
dev.off()



